library(Matrix)
library(scran)
library(Rtsne)
library(irlba)
library(cowplot)
library(biomaRt)
source("/nfs/research1/marioni/jonny/chimera-t/scripts/core_functions.R")
load_data(remove_doublets = TRUE, remove_stripped = TRUE, remove_pool2 = TRUE)
nPC = 50
In this script, we perform batch correction on our data.
For batch correction, we employ the scran function fastMNN, which performs batch correction in the manner of mnnCorrect, but in the PC-space, and much faster. Critically, this is a composition-aware batch-correction strategy that should not be affected by the lack of e.g. extraembryonic tissue in the Tomato+ samples (as the injected stem cells do not contribute to these lineages). We correct within each timepoint only.
hvgs_E7.5 = getHVGs(scater::normalize(sce[,meta$stage == "E7.5"]))
hvgs_E8.5 = getHVGs(scater::normalize(sce[,meta$stage == "E8.5"]))
tab = table(meta$sample[meta$stage == "E7.5"])
#perform batch correction within each genotype, then across the genotypes
correct1 = doBatchCorrect(counts = logcounts(scater::normalize(sce[hvgs_E7.5, meta$stage == "E7.5"])),
timepoints = meta$tomato[meta$stage == "E7.5"],
samples = meta$sample[meta$stage == "E7.5"],
timepoint_order = as.logical(c("TRUE", "FALSE")),
sample_order = as.numeric(names(tab[order(tab, decreasing = TRUE)])))
tab = table(meta$sample[meta$stage == "E8.5"])
correct2 = doBatchCorrect(counts = logcounts(scater::normalize(sce[hvgs_E8.5, meta$stage == "E8.5"])),
timepoints = meta$tomato[meta$stage == "E8.5"],
samples = meta$sample[meta$stage == "E8.5"],
timepoint_order = as.logical(c("TRUE", "FALSE")),
sample_order = as.numeric(names(tab[order(tab, decreasing = TRUE)])))
corrected = list("E7.5" = correct1,
"E8.5" = correct2)
saveRDS(corrected, file = "/nfs/research1/marioni/jonny/chimera-t/data/corrected_pcas.rds")
A t-SNE visualisation of our cells, pre- and post-correction, is shown in Figure 1.
base_7.5 = prcomp_irlba(t(logcounts(scater::normalize(sce[hvgs_E7.5, meta$stage == "E7.5"]))), n = nPC)$x
base_8.5 = prcomp_irlba(t(logcounts(scater::normalize(sce[hvgs_E8.5, meta$stage == "E8.5"]))), n = nPC)$x
tsne_pre_7.5 = Rtsne(base_7.5, pca = FALSE)$Y
tsne_post_7.5 = Rtsne(corrected$E7.5, pca = FALSE)$Y
tsne_pre_8.5 = Rtsne(base_8.5, pca = FALSE)$Y
tsne_post_8.5 = Rtsne(corrected$E8.5, pca = FALSE)$Y
ro1 = sample(nrow(base_7.5), nrow(base_7.5))
ro2 = sample(nrow(base_8.5), nrow(base_8.5))
p1 = ggplot(as.data.frame(tsne_pre_7.5)[ro1,], aes(x = V1, y = V2, col = factor(meta$sample[meta$stage == "E7.5"][ro1]))) +
geom_point(size = 0.4) +
scale_colour_manual(values = sample_colours, name = "Sample") +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank()) +
ggtitle("E7.5, pre-correction") +
labs(x= "tSNE1", y= "tSNE2")
p2 = ggplot(as.data.frame(tsne_post_7.5)[ro1,], aes(x = V1, y = V2, col = factor(meta$sample[meta$stage == "E7.5"][ro1]))) +
geom_point(size = 0.4) +
scale_colour_manual(values = sample_colours, name = "Sample") +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank()) +
ggtitle("E7.5, post-correction") +
labs(x= "tSNE1", y= "tSNE2")
p3 = ggplot(as.data.frame(tsne_pre_8.5)[ro2,], aes(x = V1, y = V2, col = factor(meta$sample[meta$stage == "E8.5"][ro2]))) +
geom_point(size = 0.4) +
scale_colour_manual(values = sample_colours, name = "Sample") +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank()) +
ggtitle("E8.5, pre-correction") +
labs(x= "tSNE1", y= "tSNE2")
p4 = ggplot(as.data.frame(tsne_post_8.5)[ro2,], aes(x = V1, y = V2, col = factor(meta$sample[meta$stage == "E8.5"][ro2]))) +
geom_point(size = 0.4) +
scale_colour_manual(values = sample_colours, name = "Sample") +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank()) +
ggtitle("E8.5, post-correction") +
labs(x= "tSNE1", y= "tSNE2")
plot_grid(p1, p2, p3, p4)
Figure 1: t-SNE of cells before and after correction
Cells in red colours are Tomato+ (injected), cells in grey colours are Tomato- (embryo). Different shades of colour indicate difference samples.
Figure 2 shows the same plots, but coloured by the mapped celltype (see the mapping script for details).
tsne_7.5 = Rtsne(corrected[["E7.5"]], pca = FALSE)$Y
tsne_8.5 = Rtsne(corrected[["E8.5"]], pca = FALSE)$Y
ro1 = sample(sum(meta$stage == "E7.5"), sum(meta$stage == "E7.5"))
ro2 = sample(sum(meta$stage == "E8.5"), sum(meta$stage == "E8.5"))
plegend = ggplot(as.data.frame(tsne_7.5)[ro1,], aes(x = V1, y = V2, col = factor(meta$celltype.mapped[meta$stage == "E7.5"][ro1], levels = names(celltype_colours), ordered = TRUE))) +
geom_point() +
scale_colour_manual(values = celltype_colours, drop = FALSE, name = "") +
theme(axis.text = element_blank(),
axis.ticks = element_blank()) +
guides(col = guide_legend(override.aes = list(size = 5), ncol = 3))
p1 = ggplot(as.data.frame(tsne_7.5)[ro1,], aes(x = V1, y = V2, col = factor(meta$celltype.mapped[meta$stage == "E7.5"][ro1], levels = names(celltype_colours), ordered = TRUE))) +
geom_point() +
scale_colour_manual(values = celltype_colours, name = "") +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank()) +
ggtitle("E7.5") +
labs(x = "tSNE1", y = "tSNE2")
p2 = ggplot(as.data.frame(tsne_8.5)[ro2,], aes(x = V1, y = V2, col = factor(meta$celltype.mapped[meta$stage == "E8.5"][ro2], levels = names(celltype_colours), ordered = TRUE))) +
geom_point() +
scale_colour_manual(values = celltype_colours) +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank()) +
ggtitle("E8.5") +
labs(x = "tSNE1", y = "tSNE2")
plot_grid(p1, get_legend(plegend), p2, ncol = 1, rel_heights = c(1,0.6,1))
Figure 2: t-SNEs, coloured by celltype
Finally, we generate UMAP coordinates of the batch-corrected data.
umap_7.5 = getUMAP(corrected[["E7.5"]])
umap_8.5 = getUMAP(corrected[["E8.5"]])
p1 = ggplot(as.data.frame(umap_7.5)[ro1,], aes(x = V1, y = V2, col = factor(meta$celltype.mapped[meta$stage == "E7.5"][ro1], levels = names(celltype_colours), ordered = TRUE))) +
geom_point(size = 0.3) +
scale_colour_manual(values = celltype_colours) +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank()) +
ggtitle("E7.5") +
labs(x = "UMAP1", y = "UMAP2")
p2 = ggplot(as.data.frame(umap_8.5)[ro2,], aes(x = V1, y = V2, col = factor(meta$celltype.mapped[meta$stage == "E8.5"][ro2], levels = names(celltype_colours), ordered = TRUE))) +
geom_point(size = 0.3) +
scale_colour_manual(values = celltype_colours) +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank()) +
ggtitle("E8.5") +
labs(x = "UMAP1", y = "UMAP2")
plot_grid(p1, get_legend(plegend), p2, ncol = 1, rel_heights = c(1,0.6,1))
ggsave(p1, file = "/nfs/research1/marioni/jonny/chimera-t/scripts/batch_correct/umap_7.5.pdf",
width = 5, height = 5)
ggsave(p2, file = "/nfs/research1/marioni/jonny/chimera-t/scripts/batch_correct/umap_8.5.pdf",
width = 5, height = 5)
saveRDS(list("E7.5" = umap_7.5,
"E8.5" = umap_8.5),
file = "/nfs/research1/marioni/jonny/chimera-t/data/umaps.rds")
And below, the cells are split by their Tomato status.
meta$tomatoText = ifelse(meta$tomato, "Tom+", "Tom-")
p1 = ggplot(data = meta[meta$stage == "E7.5",],
mapping = aes(x = umap_7.5[ro1,1],
y = umap_7.5[ro1,2],
col = factor(celltype.mapped[ro1],
levels = names(celltype_colours),
ordered = TRUE))) +
geom_point(size = 0.3) +
scale_colour_manual(values = celltype_colours) +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank()) +
ggtitle("E7.5") +
facet_wrap(~tomatoText[ro1])
p2 = ggplot(data = meta[meta$stage == "E8.5",],
mapping = aes(x = umap_8.5[ro2,1],
y = umap_8.5[ro2,2],
col = factor(celltype.mapped[ro2],
levels = names(celltype_colours),
ordered = TRUE))) +
geom_point(size = 0.3) +
scale_colour_manual(values = celltype_colours) +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank()) +
ggtitle("E8.5") +
facet_wrap(~tomatoText[ro2])
plot_grid(p1, p2, ncol = 1)
ggsave(p1, file = "/nfs/research1/marioni/jonny/chimera-t/scripts/batch_correct/umap_7.5_split.pdf",
width = 10, height = 5)
ggsave(p2, file = "/nfs/research1/marioni/jonny/chimera-t/scripts/batch_correct/umap_8.5_split.pdf",
width = 10, height = 5)
These chimeras contain two differenct clones of the T knockout (i.e., different disruptions of the T genes). These are shown in Figure 3 for all samples.
meta$clone = sapply(meta$sample,
switch,
"B6KO",
"B6HOST",
"B6KO",
"B6HOST",
"B6KO",
"B6HOST",
"B6KO",
"B6HOST",
"C6KO",
"C6HOST",
"B6KO",
"B6HOST",
"C6KO",
"C6HOST",
"C6KO",
"C6HOST")
meta_8.5= meta[meta$stage == "E8.5",]
plots_8.5 = lapply(unique(meta_8.5$clone), function(x){
ggplot() +
geom_point(mapping = aes(x = umap_8.5[,1],
y = umap_8.5[,2]),
col = "lightgrey") +
geom_point(mapping = aes(x = umap_8.5[meta_8.5$clone==x,1],
y = umap_8.5[meta_8.5$clone==x,2],
col = meta_8.5$celltype.mapped[meta_8.5$clone==x])) +
scale_color_manual(values = celltype_colours) +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank()) +
labs(x = "UMAP1", y = "UMAP2") +
ggtitle(paste0(x, " (E8.5)"))
})
meta_7.5= meta[meta$stage == "E7.5",]
plots_7.5 = lapply(unique(meta_7.5$clone), function(x){
ggplot() +
geom_point(mapping = aes(x = umap_7.5[,1],
y = umap_7.5[,2]),
col = "lightgrey") +
geom_point(mapping = aes(x = umap_7.5[meta_7.5$clone==x,1],
y = umap_7.5[meta_7.5$clone==x,2],
fill = meta_7.5$celltype.mapped[meta_7.5$clone==x]),
pch = 21, col = "black") +
scale_fill_manual(values = celltype_colours) +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank()) +
labs(x = "UMAP1", y = "UMAP2") +
ggtitle(paste0(x, " (E7.5)"))
})
grid = plot_grid(plotlist= c(plots_7.5, plots_8.5), ncol = 2)
grid
Figure 3: Location of different clones in the UMAPs
save_plot(grid, file = "/nfs/research1/marioni/jonny/chimera-t/scripts/batch_correct/clone.pdf", base_width = 8, base_height = 16)
sessionInfo()
## R version 3.5.3 (2019-03-11)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
##
## locale:
## [1] C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] scales_1.0.0 scater_1.10.1
## [3] knitr_1.23 biomaRt_2.38.0
## [5] cowplot_0.9.4 ggplot2_3.1.1
## [7] irlba_2.3.3 Rtsne_0.15
## [9] scran_1.10.2 SingleCellExperiment_1.4.1
## [11] SummarizedExperiment_1.12.0 DelayedArray_0.8.0
## [13] matrixStats_0.54.0 Biobase_2.42.0
## [15] GenomicRanges_1.34.0 GenomeInfoDb_1.18.2
## [17] IRanges_2.16.0 S4Vectors_0.20.1
## [19] BiocGenerics_0.28.0 BiocParallel_1.16.6
## [21] Matrix_1.2-17 BiocStyle_2.10.0
## [23] rmarkdown_1.13
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-6 bit64_0.9-7
## [3] RcppAnnoy_0.0.12 RColorBrewer_1.1-2
## [5] progress_1.2.2 httr_1.4.0
## [7] dynamicTreeCut_1.63-1 tools_3.5.3
## [9] R6_2.4.0 HDF5Array_1.10.1
## [11] vipor_0.4.5 uwot_0.1.3
## [13] DBI_1.0.0 lazyeval_0.2.2
## [15] colorspace_1.4-1 withr_2.1.2
## [17] tidyselect_0.2.5 gridExtra_2.3
## [19] prettyunits_1.0.2 bit_1.1-14
## [21] compiler_3.5.3 BiocNeighbors_1.0.0
## [23] labeling_0.3 bookdown_0.11
## [25] stringr_1.4.0 digest_0.6.19
## [27] XVector_0.22.0 pkgconfig_2.0.2
## [29] htmltools_0.3.6 highr_0.8
## [31] limma_3.38.3 rlang_0.3.4
## [33] RSQLite_2.1.1 DelayedMatrixStats_1.4.0
## [35] dplyr_0.8.1 RCurl_1.95-4.12
## [37] magrittr_1.5 GenomeInfoDbData_1.2.0
## [39] Rcpp_1.0.1 ggbeeswarm_0.6.0
## [41] munsell_0.5.0 Rhdf5lib_1.4.3
## [43] viridis_0.5.1 stringi_1.4.3
## [45] yaml_2.2.0 edgeR_3.24.3
## [47] zlibbioc_1.28.0 rhdf5_2.26.2
## [49] plyr_1.8.4 grid_3.5.3
## [51] blob_1.1.1 crayon_1.3.4
## [53] lattice_0.20-38 hms_0.4.2
## [55] locfit_1.5-9.1 pillar_1.4.1
## [57] igraph_1.2.4.1 codetools_0.2-16
## [59] reshape2_1.4.3 XML_3.98-1.20
## [61] glue_1.3.1 evaluate_0.14
## [63] RcppParallel_4.4.3 BiocManager_1.30.4
## [65] gtable_0.3.0 purrr_0.3.2
## [67] assertthat_0.2.1 xfun_0.7
## [69] RSpectra_0.15-0 viridisLite_0.3.0
## [71] tibble_2.1.3 AnnotationDbi_1.44.0
## [73] beeswarm_0.2.3 memoise_1.1.0
## [75] statmod_1.4.32